home *** CD-ROM | disk | FTP | other *** search
/ Libris Britannia 4 / science library(b).zip / science library(b) / PROGRAMM / CC_C / 0335.ZIP / CUT.C < prev    next >
Text File  |  1985-05-08  |  10KB  |  468 lines

  1. /*
  2.  * cut - a recreation of the Unix(Tm) cut(1) command.
  3.  *
  4.  * syntax:  cut -cLIST[ file1 ...]
  5.  *        cut -fLIST [-d char][ -s][ file1 ...]
  6.  *
  7.  *    Copyright (C) 1984 by David M. Ihnat
  8.  *
  9.  * This program is a total rewrite of the Bell Laboratories Unix(Tm)
  10.  * command of the same name, as of System V.  It contains no proprietary
  11.  * code, and therefore may be used without violation of any proprietary
  12.  * agreements whatsoever.  However, you will notice that the program is
  13.  * copyrighted by me.  This is to assure the program does *not* fall
  14.  * into the public domain.  Thus, I may specify just what I am now:
  15.  * This program may be freely copied and distributed, provided this notice
  16.  * remains; it may not be sold for profit without express written consent of
  17.  * the author.
  18.  * Please note that I recreated the behavior of the Unix(Tm) 'cut' command
  19.  * as faithfully as possible; however, I haven't run a full set of regression
  20.  * tests.  Thus, the user of this program accepts full responsibility for any
  21.  * effects or loss; in particular, the author is not responsible for any losses, * explicit or incidental, that may be incurred through use of this program.
  22.  *
  23.  * I ask that any bugs (and, if possible, fixes) be reported to me when
  24.  * possible.  -David Ihnat (312) 784-4544 ihuxx!ignatz
  25.  */
  26.  
  27. #include <stdio.h>
  28.  
  29. extern int errno;
  30. /*
  31. #define CPM
  32. */
  33. /* I'd love to use enums, but not everyone has them.  Portability, y'know. */
  34. #define BADLIST        1
  35. #define NODELIM        2
  36. #define NOFIELDS    3
  37. #define USAGE        4
  38. #define BADFILE        5
  39. #define BACKERR        6
  40. #define TOOLONG        7
  41.  
  42. #define    TAB    '\t'
  43. #define BACKSP    0x8
  44. #define    _MAXSZ    512
  45. #define COMMAND "cut"
  46.  
  47. #define    IGNOREIT    0
  48. #define CUTIT        1
  49.  
  50. char outbuf[_MAXSZ];            /* Processed output buffer */
  51. char rawbuf[_MAXSZ];            /* Raw holding buffer for field mode */
  52. #define    FLDFLAG    fields[0]        /* Used for EOL processing */
  53. short int fields[_MAXSZ];        /* Max number of fields or line length */
  54.  
  55. char *cmdnam;
  56.  
  57. short int cflag,fflag,sflag;
  58. char delim = TAB;
  59.  
  60. main(argc,argv)
  61. int argc;
  62. char **argv;
  63. {
  64.     FILE *fileptr;
  65.     FILE *fopen();
  66.     int filecnt;
  67.  
  68.     cflag = fflag = sflag = 0;
  69.  
  70. #ifdef CPM
  71.     cmdnam = COMMAND;
  72. #else
  73.     cmdnam = *argv;
  74. #endif
  75.  
  76.     /* Skip invocation name */
  77.     argv++;
  78.     argc--;
  79.  
  80.     /* Most compilers initialize storage to zero; but don't count on it. */
  81.  
  82.     for(filecnt = 0;filecnt < _MAXSZ;filecnt++)
  83.         fields[filecnt] = IGNOREIT;
  84.  
  85.     /* First, parse input options */
  86.  
  87.     while(argv[0][0] == '-')
  88.     {
  89.         switch(argv[0][1])
  90.         {
  91.             case 'c':
  92.             case 'C':
  93.                 /* Build the character position list */
  94.                 if(fflag || cflag)
  95.                     prerr(USAGE,NULL);
  96.                 else
  97.                 {
  98.                     cflag++;
  99.                     setflds(&argv[0][2]);
  100.                 }
  101.                 break;
  102.  
  103.             case 'f':
  104.             case 'F':
  105.                 /* Build the field position list */
  106.                 if(fflag || cflag)
  107.                     prerr(USAGE,NULL);
  108.                 else
  109.                 {
  110.                     fflag++;
  111.                     setflds(&argv[0][2]);
  112.                 }
  113.                 break;
  114.  
  115.             case 'd':
  116.             case 'D':
  117.                 /* New delimiter */
  118.                 delim = argv[0][2];
  119.                 if(delim == '\0')
  120.                     prerr(NODELIM,NULL);
  121.  
  122.                 break;
  123.  
  124.             case 's':
  125.             case 'S':
  126.                 sflag++;
  127.                 break;
  128.  
  129.             default:
  130.                 prerr(USAGE,NULL);
  131.         }
  132.         argv++;
  133.         argc--;
  134.     }
  135.  
  136.     /* Finished all setup.  If no fields selected, tell them and exit. */
  137.     if(!(cflag | fflag))
  138.         prerr(BADLIST,NULL);
  139.  
  140.     if(!FLDFLAG)
  141.         prerr(NOFIELDS,NULL);
  142.  
  143.     /*
  144.      * If no files specified, process stdin.  Otherwise,
  145.      * process on a file-by-file basis.
  146.      */
  147.      if(argc == 0)
  148.         dofile(stdin);
  149.     else
  150.         for(filecnt = 0;filecnt < argc;filecnt++,argv++)
  151.             if((fileptr = fopen(argv[0],"r")) == (FILE *)NULL)
  152.                 prerr(BADFILE,argv);
  153.             else
  154.             {
  155.                 dofile(fileptr);
  156.                 fclose(fileptr);
  157.             }
  158. }
  159.  
  160. setflds(fldstr)
  161. char *fldstr;
  162. {
  163.     /*
  164.      * The string, character or field, must have one of the 
  165.      * following formats:
  166.      *
  167.      *    n
  168.      *    n,m[,...]    where n<m
  169.      *    a-b        where a<b
  170.      *    -n,m        where n<m; implies 1-n
  171.      *    n-        where - implies to end of line or last field
  172.      */
  173.     int index,minflag,value,fldset;
  174.  
  175.     minflag = 0;
  176.     value = 0;
  177.     index = 1;
  178.     FLDFLAG = 0;
  179.  
  180.     for(;;)
  181.     {
  182.         switch(*fldstr)
  183.         {
  184.             case '-':
  185.                 /* Starting a range */
  186.                 if(minflag)
  187.                     prerr(BADLIST,NULL);
  188.                 minflag++;
  189.                 fldstr++;
  190.  
  191.                 if(value)
  192.                 {
  193.                     if(value >= _MAXSZ)
  194.                         prerr(BADLIST,NULL);
  195.  
  196.                     index = value;
  197.                 }else
  198.                     index = 1;
  199.  
  200.                 value = 0;
  201.                 break;
  202.             
  203.             case ',':
  204.             case '\0':
  205.                 /* Ending the string, or this field/column sublist */
  206.                 if(minflag) /* Some damnable range */
  207.                 {    /* Ranges are nasty.  Possibles:
  208.                      * -n,a-n,n-.  In any case, index
  209.                      * contains the start of the range.
  210.                      */
  211.                     if(!value)
  212.                     {    /* From index to EOL */
  213.  
  214.                         FLDFLAG = index;
  215.                         fldset++;
  216.                         value = 0;
  217.                     }else
  218.                     {
  219.                         if(value >= _MAXSZ)
  220.                             prerr(BADLIST,NULL);
  221.  
  222.                         if(value < index)
  223.                             prerr(BADLIST,NULL);
  224.  
  225.                         /* Already a TOEOL sequence? */
  226.                         if(FLDFLAG)
  227.                         {
  228.                             /*
  229.                              * Yes.  Now...is the new sequence already
  230.                              * contained by the old one? If so, no processing
  231.                              * is necessary.
  232.                              */
  233.                             if(FLDFLAG > index)
  234.                             {
  235.                                 /*
  236.                                  * No, the new sequence starts before the old.
  237.                                  * Does the range extend into the current
  238.                                  * EOL range? If so, simply move the EOL marker.
  239.                                  */
  240.                                 if(FLDFLAG < value)
  241.                                 {
  242.                                     FLDFLAG = index;
  243.                                 }else
  244.                                     /* Simple range. Fill it. */
  245.                                     for(; index <= value ;index++)
  246.                                         fields[index] = CUTIT;
  247.  
  248.                                 /* In any case, some fields were selected. */
  249.                                 fldset++;
  250.                             }
  251.                         }else    /* Ok, no TOEOL sequence */
  252.                         {
  253.                             for(;index <= value;index++)
  254.                             {
  255.                                 fields[index] = CUTIT;
  256.                             }
  257.                             fldset++;
  258.                         }
  259.                         value = 0;
  260.                     }
  261.                     minflag = 0;    /* Reset the field-in-progress flag. */
  262.                 }else
  263.                     if(value)
  264.                     {
  265.                         if(value >= _MAXSZ)
  266.                             prerr(BADLIST,NULL);
  267.  
  268.                         fields[value] = CUTIT;
  269.                         value = 0;
  270.                         fldset++;
  271.                     }
  272.  
  273.  
  274.                 if(*fldstr == '\0')
  275.                 {
  276.                     /*
  277.                      * Last bit of processing.  If there was an EOL,
  278.                      * fill the array from the EOL point.  In any case,
  279.                      * if there were any fields selected, leave the FLDFLAG
  280.                      * value non-zero on return.
  281.                      */
  282.                     if(FLDFLAG)
  283.                         for(index = FLDFLAG; index < _MAXSZ; index++)
  284.                             fields[index] = CUTIT;
  285.  
  286.                     if(fldset)
  287.                         FLDFLAG = 1;
  288.  
  289.                     return(0);
  290.                 }
  291.  
  292.                 fldstr++;
  293.                 break;
  294.  
  295.             default:
  296.                 if((*fldstr < '0' ) || (*fldstr > '9' ))
  297.                     prerr(BADLIST,NULL);
  298.  
  299.                 else
  300.                 {
  301.                     value = 10 * value + *fldstr - '0';
  302.                     fldstr++;
  303.                 }
  304.         }
  305.     }
  306. }
  307.  
  308. dofile(fno)
  309. FILE *fno;
  310. {
  311.     /*
  312.      * This will process the input files according to the rules specified
  313.      * in the fields array.
  314.      */
  315.  
  316.      int charcnt,poscnt,bflag,doneflag,fldfound;
  317.      register int c;
  318.  
  319.      char *inbufptr, *rawbufptr;
  320.  
  321.      do
  322.      {
  323.         inbufptr =  outbuf;
  324.         rawbufptr = rawbuf;
  325.         charcnt =  bflag = doneflag = fldfound = 0;
  326.         poscnt = 1;
  327.  
  328.         do
  329.         {
  330.             c = fgetc(fno);
  331.             if(c == EOF)
  332.             {
  333.                 /* That's it for this file or stream */
  334.                 doneflag++;
  335.                 break;
  336.             }
  337.  
  338.             if(cflag)
  339.             {
  340.                 /*
  341.                  * In character scan mode.  Look to see if
  342.                  * it's an NROFF-type underlined character;
  343.                  * if so, then don't count the backspace.
  344.                  * Notice that this could cause a buffer
  345.                  * overflow in the worst case situation...
  346.                  * but that's MOST unlikely.
  347.                  */
  348.  
  349.                 if(c == BACKSP)
  350.                 {
  351.                     if(bflag)
  352.                         prerr(BACKERR);
  353.                     else
  354.                     {
  355.                         bflag++;
  356.                         *inbufptr++ = c;
  357.                     }
  358.                 }else
  359.                 {
  360.                     /*
  361.                      * Valid character.  If it's to be sent,
  362.                      * stow it in the outbuffer.
  363.                      */
  364.                      bflag = 0;
  365.  
  366.                      if(++charcnt == (_MAXSZ - 1))
  367.                         prerr(TOOLONG);
  368.  
  369.                      if(fields[charcnt] && (c != '\n'))
  370.                         *inbufptr++ = c;
  371.                 }
  372.             }else
  373.             {
  374.                 /*
  375.                  * Field processing.  In this case, charcnt
  376.                  * does indicate processed characters on the
  377.                  * current line, but that is all.  Notice that
  378.                  * ALL characters are initially stowed in the
  379.                  * raw  buffer, until at least one field has
  380.                  * been found.
  381.                  */
  382.                  if(fields[poscnt])
  383.                  {
  384.                     /* Ok, working on a field.  It,
  385.                      * and its terminating delimiter,
  386.                      * go only into the processed buffer.
  387.                      */
  388.                      fldfound = 1;
  389.                      if(c != '\n')
  390.                          *inbufptr++ = c;
  391.                 }else
  392.                     if(!fldfound)
  393.                     {
  394.                         charcnt++;
  395.                         if(c != '\n')
  396.                             *rawbufptr++ = c;
  397.                     }
  398.                 /*
  399.                  * In any case, if a delimiter, bump the field
  400.                  * indicator.
  401.                  */
  402.                  if(c == delim)
  403.                     poscnt++;
  404.             }
  405.         }while(c != '\n');
  406.  
  407.         if((cflag && charcnt) || (fflag && fldfound))
  408.         {
  409.             /*
  410.              * No matter what mode, something was found. Print it.
  411.              */
  412.  
  413.             if(fflag && (*(inbufptr-1) == delim))
  414.                 --inbufptr; /* Supress trailing delimiter */
  415.  
  416.             *inbufptr = '\0'; /* But null-terminate the line. */
  417.             puts(outbuf);
  418.         }else
  419.             if((fflag && (!sflag)) && charcnt)
  420.             {
  421.                 /*
  422.                  * In this case, a line with some characters,
  423.                  * no delimiters, and no supression.  Print it.
  424.                  */
  425.  
  426.                  *rawbufptr = '\0';
  427.                  puts(rawbuf);
  428.             }
  429.  
  430.      }while(!doneflag);
  431. }
  432.  
  433. prerr(etype, estring)
  434. int etype;
  435. char *estring;
  436. {
  437.     switch(etype)
  438.     {
  439.         case BADLIST:
  440.             fprintf(stderr,"%s : bad list for c/f option\n",cmdnam);
  441.             break;
  442.  
  443.         case USAGE:
  444.             fprintf(stderr,"Usage: %s [-s] [-d<char>] {-c<list> | -f<list>} file ...\n",cmdnam);
  445.             break;
  446.  
  447.         case NOFIELDS:
  448.             fprintf(stderr,"%s : no fields\n",cmdnam);
  449.             break;
  450.  
  451.         case NODELIM:
  452.             fprintf(stderr,"%s : no delimiter\n",cmdnam);
  453.             break;
  454.  
  455.         case BADFILE:
  456.             fprintf(stderr,"Cannot open: %s : %s\n",cmdnam,estring);
  457.             break;
  458.         
  459.         case BACKERR:
  460.             fprintf(stderr,"%s : cannot handle multiple adjacent backspaces\n",cmdnam);
  461.             break;
  462.  
  463.         case TOOLONG:
  464.             fprintf(stderr,"%s : line too long\n",cmdnam);
  465.     }
  466.     exit(2);
  467. }
  468.